#!/usr/bin/python
import sys
import urllib2
import re
import getopt

def usage():
	print """
Download pictures for one thread of Tieba.  
Usage: ./TiebaCrawler [-s start_page] [-e end_page] [-o save_loc] [-t jpg|text] url
Example: ./TiebaCrawler -p 2 5 -o Test/ -t jpg http://tieba.baidu.com/p/1374578507"""

def main():
	try: 	
		optlist, args = getopt.getopt(sys.argv[1:], 's:e:o:t:')
	except getopt.GetoptError, err:
		usage()
		sys.exit(2) 	

	url = args[0]
	start_page = 1
	end_page = 10
	loc = "./"
	jpg = True
	for o, a in optlist:
		if o == "-s":
			start_page = int(a)
		elif o == "-e":
			end_page = int(a)
		elif o == "-o":
			loc = a
		elif o == "-t":
			if a == "jpg":
				jpg = True
			elif a == "text":
				jpg = False

	print "url:" + url
	print "start page:%d" % start_page
	print "end page:%d" % end_page
	print "loc: " + loc

	for i in range(start_page, end_page+1): 
		cnt = i * 1000
		real_url = url + "?pn=" + str(i)
		print "Download " + real_url
		content = urllib2.urlopen(real_url).read()
		imgPattern = r"http:\/\/imgsrc.baidu.com\/forum\/pic\/item\/\w*\.jpg"
		result = re.findall(imgPattern, content)
		#print result
		for pic_url in result: 
			print "Fetch: " + pic_url
			pic = urllib2.urlopen(pic_url)
			fileName = loc + str(cnt) + ".jpg"
			localFile = open(fileName, 'w')
			localFile.write(pic.read())
			localFile.close()
			cnt += 1 

if __name__ == "__main__":
	main()
